{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b6ee1938",
   "metadata": {},
   "source": [
    "[**Grand, G., Blank, I.A., Pereira, F. and Fedorenko, E., 2022. Semantic projection recovers rich human knowledge of multiple object features from word embeddings. _Nature Human Behaviour_, pp.1-13.m**](https://www.nature.com/articles/s41562-022-01316-8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7f8a956c",
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Help on class Text2Mind in module cntext.mind:\n",
      "\n",
      "class Text2Mind(builtins.object)\n",
      " |  Text2Mind(w2v_model_path='glove_w2v.6B.100d.txt')\n",
      " |  \n",
      " |  Calculate cognitive (attitude, bias) direction and strength in text\n",
      " |  \n",
      " |  Methods defined here:\n",
      " |  \n",
      " |  __init__(self, w2v_model_path='glove_w2v.6B.100d.txt')\n",
      " |      Init the Text2Mind\n",
      " |      :param w2v_model_path:  pretained embedding model file path, only support word2vec format pretained model！\n",
      " |  \n",
      " |  sematic_distance(self, words, c_words1, c_words2)\n",
      " |      Calculate the distance from words with c_words1 and c_words2 respectively, and return the difference between the two distance.\n",
      " |      Greater than 0 means semantically closer to c_words2\n",
      " |      \n",
      " |      :param words: words list, words = ['program', 'software', 'computer']\n",
      " |      :param c_words1: concept words1, c_words1 = [\"man\", \"he\", \"him\"]\n",
      " |      :param c_words2: concept words2, c_words2 = [\"woman\", \"she\", \"her\"]\n",
      " |      :return:\n",
      " |  \n",
      " |  sematic_projection(self, words, c_words1, c_words2)\n",
      " |      Calculate the projected length of each word in the concept vector.Note that the calculation result reflects the direction of concept.\n",
      " |      Greater than 0 means semantically closer to c_words2.\n",
      " |      \n",
      " |      Refer \"Grand, G., Blank, I.A., Pereira, F. and Fedorenko, E., 2022. Semantic projection recovers rich human knowledge of multiple object features from word embeddings. _Nature Human Behaviour_, pp.1-13.\"\n",
      " |      \n",
      " |      For example, in the size concept, if you want positive means big, and negative means small,\n",
      " |      you should set c_words1 = [\"small\", \"little\", \"tiny\"] c_words2 = [\"large\", \"big\", \"huge\"].\n",
      " |      \n",
      " |      :param words: words list\n",
      " |      :param c_words1: concept words1, c_words1 = [\"small\", \"little\", \"tiny\"]\n",
      " |      :param c_words2: concept words2, c_words2 = [\"large\", \"big\", \"huge\"]\n",
      " |      :param c_vector: concept_vector; the result of .build_concept(c_words1, c_words2)\n",
      " |      :return:\n",
      " |  \n",
      " |  ----------------------------------------------------------------------\n",
      " |  Data descriptors defined here:\n",
      " |  \n",
      " |  __dict__\n",
      " |      dictionary for instance variables (if defined)\n",
      " |  \n",
      " |  __weakref__\n",
      " |      list of weak references to the object (if defined)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import cntext as ct\n",
    "\n",
    "help(ct.Text2Mind)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cc08f1f7",
   "metadata": {},
   "source": [
    "glove_w2v.6B.100d.txt link\n",
    "- google driver https://drive.google.com/file/d/1tuQB9PDx42z67ScEQrg650aDTYPz-elJ/view?usp=sharing\n",
    "- baidu pan 链接: https://pan.baidu.com/s/1MMfQ7M0YCzL9Klp4zrlHBw 提取码: 72l0 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4ccb9bcf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Now is loading the model of data/glove_w2v.6B.100d.txt\n",
      "Load successfully, used 20.54 s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<cntext.mind.Text2Mind at 0x7f961a704760>"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tm = ct.Text2Mind(w2v_model_path='data/glove_w2v.6B.100d.txt')\n",
    "tm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d6456d1d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('mouse', -1.68),\n",
       " ('cat', -0.92),\n",
       " ('pig', -0.46),\n",
       " ('whale', -0.24),\n",
       " ('horse', 0.4)]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "animals = ['mouse', 'cat', 'horse',  'pig', 'whale']\n",
    "smalls = [\"small\", \"little\", \"tiny\"]\n",
    "bigs = [\"large\", \"big\", \"huge\"]\n",
    "\n",
    "# In size conception, mouse is smallest, horse is biggest.\n",
    "# 在大小概念上，老鼠最小，马是最大的。\n",
    "tm.sematic_projection(words=animals, \n",
    "                      c_words1=smalls, \n",
    "                      c_words2=bigs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "012c3b8e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-0.37"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "engineer = ['program', 'software', 'computer']\n",
    "mans =  [\"man\", \"he\", \"him\"]\n",
    "womans = [\"woman\", \"she\", \"her\"]\n",
    "\n",
    "#在语义空间中，工程师更接近于男人，而不是女人。\n",
    "#in semantic space, engineer is closer to man, other than woman.\n",
    "tm.sematic_distance(words=animals, \n",
    "                    c_words1=mans, \n",
    "                    c_words2=womans)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bdddf0d1",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f4093d09",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "71af654b",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2f897687",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
